package util.htmlCleaner;

import java.io.File;
import java.io.IOException;
import java.io.StringWriter;
import java.io.Writer;

import org.htmlcleaner.CleanerProperties;
import org.htmlcleaner.HtmlCleaner;
import org.htmlcleaner.HtmlSerializer;
import org.htmlcleaner.JDomSerializer;
import org.htmlcleaner.SimpleHtmlSerializer;
import org.htmlparser.nodes.TagNode;

public class HTMLCleaner {

	/**
	 * @param args
	 * @throws IOException 
	 */
	public static void main(String[] args) throws IOException {

		CleanerProperties props = new CleanerProperties();

		HtmlCleaner cleaner = new HtmlCleaner(props);
		
		org.htmlcleaner.TagNode node = cleaner.clean(new File("/proj/dbNoBackup/pjbarrio/Experiments/Wrappers/QueryResults/training/1024/20.html"));
	
		HtmlSerializer htmlSerializer = new SimpleHtmlSerializer(props);
		
		StringWriter sw = new StringWriter();
		
		htmlSerializer.write(node, sw, "UTF-8");
		
		System.out.println();
	}

}
